911 Calls

911 call data from Kaggle.

The data contains the following fields:

  • lat : String variable, Latitude
  • lng: String variable, Longitude
  • desc: String variable, Description of the Emergency Call
  • zip: String variable, Zipcode
  • title: String variable, Title
  • timeStamp: String variable, YYYY-MM-DD HH:MM:SS
  • twp: String variable, Township
  • addr: String variable, Address
  • e: String variable, Dummy variable (always 1)

Data and Setup

In [20]:
## jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
import plotly.graph_objs as go
import plotly.plotly as py
from collections import Counter
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode,plot,iplot
In [21]:
init_notebook_mode(connected = True)
cf.go_offline()
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
In [ ]:
import plotly 
plotly.tools.set_credentials_file(username='yuan_hu', api_key='6cTZkjF30Egp6LuK7drd')
mapbox_access_token = '9qhg46fftx'
In [4]:
dateparse = lambda x: datetime.datetime.strptime(x,'%Y-%m-%d %H:%M:%S')
df = pd.read_csv("911.csv",
    header=0,names=['lat', 'lng','desc','zip','title','timeStamp','twp','addr','e'],
    dtype={'lat':str,'lng':str,'desc':str,'zip':str,
                  'title':str,'timeStamp':str,'twp':str,'addr':str,'e':int}, 
     parse_dates=['timeStamp'],date_parser=dateparse)

Feature Engineering

In [5]:
df['Reason'] = df['title'].apply(lambda title: title.split(':')[0])
In [6]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
In [7]:
df['Hour'] = df['timeStamp'].apply(lambda time : time.hour)
df['Month'] = df['timeStamp'].apply(lambda time : time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)
df['Date']=df['timeStamp'].apply(lambda t: t.date())
In [8]:
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
In [9]:
df['Day of Week'] = df['Day of Week'].map(dmap)
In [10]:
df.drop('e', axis = 1, inplace = True)
In [11]:
df.head()
Out[11]:
lat lng desc zip title timeStamp twp addr Reason Hour Month Day of Week Date
0 40.2978759 -75.5812935 REINDEER CT & DEAD END; NEW HANOVER; Station ... 19525 EMS: BACK PAINS/INJURY 2015-12-10 17:10:52 NEW HANOVER REINDEER CT & DEAD END EMS 17 12 Thu 2015-12-10
1 40.2580614 -75.2646799 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... 19446 EMS: DIABETIC EMERGENCY 2015-12-10 17:29:21 HATFIELD TOWNSHIP BRIAR PATH & WHITEMARSH LN EMS 17 12 Thu 2015-12-10
2 40.1211818 -75.3519752 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... 19401 Fire: GAS-ODOR/LEAK 2015-12-10 14:39:21 NORRISTOWN HAWS AVE Fire 14 12 Thu 2015-12-10
3 40.1161530 -75.3435130 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... 19401 EMS: CARDIAC EMERGENCY 2015-12-10 16:47:36 NORRISTOWN AIRY ST & SWEDE ST EMS 16 12 Thu 2015-12-10
4 40.2514920 -75.6033497 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... NaN EMS: DIZZINESS 2015-12-10 16:56:52 LOWER POTTSGROVE CHERRYWOOD CT & DEAD END EMS 16 12 Thu 2015-12-10

EDA

In [12]:
data = [go.Histogram(x = df['Reason'])]
df.iplot(data, filename='basic histogram', xTitle = 'Reason', yTitle = 'Count')

Hour

In [13]:
data = [go.Histogram(x = df['Hour'])]
df.iplot(data, filename='basic histogram', xTitle = 'Hour', yTitle = 'Count')

Day of Week

In [14]:
data = [go.Histogram(x = df['Day of Week'])]
df.iplot(data, filename='basic histogram', xTitle = 'Day of Week', yTitle = 'Count')

Month

In [15]:
data = [go.Histogram(x = df['Month'])]
df.iplot(data, filename='basic histogram', xTitle = 'Month', yTitle = 'Count')

Month by Reason

In [16]:
trace1 = go.Histogram(x = df[df['Reason'] == 'EMS']['Month'], name = 'EMS')
trace2 = go.Histogram(x = df[df['Reason'] == 'Fire']['Month'], name = 'Fire')
trace3 = go.Histogram(x = df[df['Reason'] == 'Traffic']['Month'], name = 'Traffic')
layout = go.Layout(
    xaxis=dict(
        title='Month'
    ),
    yaxis=dict(
        title='Count'
    ),
    bargap=0.2,
    bargroupgap=0.1
)
data = [trace1,trace2,trace3]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Month')
/Users/yuanhu/anaconda2/lib/python2.7/site-packages/plotly/plotly/plotly.py:224: UserWarning:

Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points

If the visualization you're using aggregates points (e.g., box plot, histogram, etc.) you can disregard this warning.

Out[16]:

Date

In [20]:
import plotly.plotly as py
import plotly.graph_objs as go

df.groupby('Date').count()['twp'].iplot()
plt.tight_layout()
<matplotlib.figure.Figure at 0x10cf24590>

Date by Reason

In [18]:
data_ems = df[df['Reason']=='EMS'].groupby('Date').count()['twp']
data_fire = df[df['Reason']=='Fire'].groupby('Date').count()['twp']
data_traffic = df[df['Reason']=='Traffic'].groupby('Date').count()['twp']
data = pd.DataFrame([data_ems, data_fire, data_traffic]).transpose()
data = data.reset_index()
data.columns = ['Date','EMS', 'Fire', 'Traffic']
trace_ems = go.Scatter(x=data.Date,y=data['EMS'], name = 'EMS', opacity = 0.8)
trace_fire = go.Scatter(x=data.Date,y=data['Fire'], name = 'Fire', opacity = 0.8)
trace_traffic = go.Scatter(x=data.Date,y=data['Traffic'], name = 'Traffic', opacity = 0.8)
data = [trace_ems, trace_fire, trace_traffic]
layout = dict(
    title = "Manually Set Date Range",
)


fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")
High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~yuan_hu/0 or inside your plot.ly account where it is named 'Manually Set Range'
Out[18]:
In [17]:
dayHour = pd.DataFrame(df.groupby(by=['Day of Week','Hour']).count()['Reason'].unstack())
trace = go.Heatmap(z = np.array(dayHour), x = [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23], y = ['Fri', 'Mon', 'Sat', 'Sun', 'Thu', 'Tue', 'Wed'], colorscale = 'Viridis')
data = [trace]
py.iplot(data, filename='Heatmap', xTitle = 'Hour', yTitle = 'Count')
Out[17]:
In [23]:
np.array(dayHour)
Out[23]:
array([[ 716,  643,  577,  522,  493,  631, 1070, 1722, 2007, 2111, 2239,
        2365, 2453, 2510, 2479, 2688, 2804, 2721, 2140, 2041, 1752, 1497,
        1416, 1109],
       [ 746,  572,  542,  493,  546,  711, 1064, 1806, 2267, 2320, 2173,
        2278, 2398, 2238, 2296, 2508, 2617, 2617, 2254, 1814, 1591, 1257,
        1077,  818],
       [ 914,  804,  718,  651,  597,  626,  767, 1021, 1302, 1656, 1921,
        2056, 2174, 2152, 2134, 2053, 2065, 2055, 1982, 1896, 1640, 1503,
        1324, 1206],
       [ 952,  835,  815,  665,  578,  618,  730,  939, 1192, 1519, 1674,
        1742, 1942, 1782, 1902, 1780, 1802, 1853, 1822, 1700, 1436, 1267,
         986,  904],
       [ 713,  545,  591,  491,  488,  666, 1126, 1837, 2131, 2247, 2233,
        2245, 2308, 2446, 2410, 2568, 2571, 2761, 2274, 1872, 1631, 1443,
        1153,  927],
       [ 717,  585,  532,  503,  516,  699, 1122, 1847, 2321, 2307, 2273,
        2372, 2373, 2391, 2422, 2612, 2781, 2677, 2347, 1893, 1649, 1405,
        1101,  843],
       [ 684,  606,  516,  516,  452,  668, 1160, 1915, 2180, 2224, 2242,
        2278, 2424, 2320, 2437, 2529, 2835, 2870, 2303, 1887, 1715, 1419,
        1154,  883]])
In [24]:
np.array(df['lat'])
Out[24]:
array(['40.2978759', '40.2580614', '40.1211818', ..., '40.1173880',
       '40.1655642', '40.0923772'], dtype=object)
In [27]:
np.array(df['lng'])[:10]
Out[27]:
array(['-75.5812935', '-75.2646799', '-75.3519752', '-75.3435130',
       '-75.6033497', '-75.2832450', '-75.1277951', '-75.4051820',
       '-75.3995896', '-75.2914577'], dtype=object)
In [28]:
data = Data([
    Scattermapbox(
        lat=np.array(df['lat'])[:10],
        lon=np.array(df['lng'])[:10],
        mode='markers',
        marker=Marker(
            size=9
        ),
        text=np.array(df['title'])[:10],
    )
])
layout = Layout(
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=40,
            lon=-75,
        ),
        pitch=0,
        zoom=10
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Multiple Mapbox')
Out[28]: